/*

This program makes Table 1

*/
 

**Set directories
cd ""  /* PROJECT ROOT FOLDER */

/* DEFINE GLOBALS PATHS HERE */
global data  "data"
global input_orig "input"
global input "data"
global temp work
global output output
global disclosure "code"
global disc_programs "code"


use "$data/firm_country_matrix.dta", clear  // made in: make_firm_country_data.do

* Collapse to country level dataset
gen firms=1



*DROP FOREIGN FIRMS!
 replace mne_status="FOR" if mne_status=="FO2"
  bys firmid: gen firm_count=1 if _n==1
  bys mne: egen tot_firms=sum(firm_count)
 
 
 drop if mne_status=="FOR"  //  Missing affiliate information for foreign firms
 


save $data/temp_country_probs_disc.dta, replace

collapse (sum) firms import export man_aff_sales  importers exporters assemblers ///
		region_importers region_exporters region_assemblers ///
		importers_c* exporters_c* assemblers_c*, by(iso region SubregionName) fast


sort exporters


local firms=245750-2200  // from disclosed total....redefine using unrounded numbers below
*The Census Bureau has ensured appropriate access and use of confidential data has reviewed these results for disclosure avoidance protection (Project 1530, July 15 2019)
*See afft_v4.xls, sheet=agg_stats Column 3:  182k+60k+2.2k+350+1.2k=245750 is total US M firms, 2.2k is total Foreign M firms


local firms= //  from unrounded original disclosure  (fill in correct number here if you want to be precise)
            

		  

*7. Calculate probabilites
gen pr_import=importers/`firms'
gen pr_export= exporters/`firms'
gen pr_assemble=assemblers/`firms'

*conditional on assembly in country
gen pr_import_caff=importers_c_aff/assemblers
gen pr_export_caff=exporters_c_aff/assemblers 

*conditional on other trade flows in country 
gen pr_import_cexp=importers_c_exp/exporters
gen pr_export_cimp=exporters_c_imp/importers

gen pr_assemble_cexp=assemblers_c_exp/exporters
gen pr_assemble_cimp=assemblers_c_imp/importers



*conditional on activites in region
gen pr_import_craff=importers_c_raff/region_assemblers
gen pr_export_craff=exporters_c_raff/region_assemblers 
gen pr_assemble_craff=assemblers_c_raff/(region_assemblers+assemblers) // need to include firms that assemble in the country

gen pr_import_crexp=importers_c_rexp/region_exporters
gen pr_export_crexp=exporters_c_rexp/(region_exporters + exporters)   // need to include firms that export to the country
gen pr_assemble_crexp=assemblers_c_rexp/region_exporters

gen pr_import_crimp=importers_c_rimp/(region_importers+importers) // need to include firms that import from the country
gen pr_export_crimp=exporters_c_rimp/region_importers 
gen pr_assemble_crimp=assemblers_c_rimp/region_importers 

format pr* %9.2fc





*Make a table of simple and weighted averages
gen tot=1


*Make weighted variables
capture program drop wts
program define wts
  syntax , pr_var(str) wt_var(varname)
    capture drop  pr_`pr_var'_wt
    gen pr_`pr_var'_wt=pr_`pr_var'*`wt_var'
  end
  
wts , pr_var(import) wt_var(importers)
wts,  pr_var(export) wt_var(exporters)

foreach vv in import  {
   foreach ty in aff raff exp rexp rimp   {
   	wts, pr_var(`vv'_c`ty') wt_var(`vv'ers_c_`ty')
	}
	}
	
foreach vv in export  {
   foreach ty in aff raff imp rexp rimp   {
   	wts, pr_var(`vv'_c`ty') wt_var(`vv'ers_c_`ty')
	}
	}	

save $data/temp_firm_country_probs.dta, replace


*Make probability table for all firms
**********************************************
use $data/temp_firm_country_probs.dta, clear	


*Make a table for top countries
egen rank_imp_val=rank(import), field
egen rank_imp_no=rank(importers), field

egen rank_exp_val=rank(export), field
egen rank_exp_no=rank(exporters), field

egen rank_ass_val=rank(man_aff_sales), field
egen rank_ass_no=rank(assemblers), field


label variable rank_exp_val "Exp Value Rank"
label variable rank_exp_no "Exp No. Firms Rank"
label variable rank_imp_val "Imp Value Rank"
label variable rank_imp_no "Imp No. Firms Rank"
label variable rank_ass_val "Aff Value Rank"
label variable rank_ass_no "Aff No. Firms Rank"



foreach vv in import export man_aff_sales {
	gen `vv'_mill=`vv'/1000
	}
	

do $disc_programs/rounding_4sigdig_v2.do import_mill export_mill man_aff_sales
do $disc_programs/rounding_4sigdig_v2.do pr_import pr_export pr_import_craff pr_export_craff pr_import_crexp pr_export_crimp
do $disc_programs/round_N.do firms importers exporters assemblers 

	
  
    
format %9.6fc pr_import pr_export pr_import_craff pr_export_craff pr_import_crexp pr_export_crimp importers exporters assemblers import_mill export_mill man_aff_sales


*Drop countries that fail and Cond probs for Canada
foreach vv in pr_import_craff pr_export_craff pr_import_crexp pr_export_crimp {
	replace `vv'=. if iso3=="CAN"
	}

tostring pr_import_craff pr_export_craff pr_export_crimp, replace

foreach vv in pr_import_craff pr_export_craff   {
	replace `vv'="D" if iso3=="AUS" 
	}
	


*Note that the DRB required additional rounding of Italy numbers which was done manually

*Order table to match paper
gsort -pr_import
keep if  rank_imp_no<=7 | rank_exp_no<=8

sort rank_imp_no

*Country-level ranks and probabilities
export excel iso SubregionName rank* pr_import pr_export  ///
		pr_import_craff pr_export_craff pr_import_crexp pr_export_crimp ///
    using $output/afft_restat_01.xls  if (rank_imp_no<=7 | rank_exp_no<=8) , ///
    sheetreplace sheet(TABLE1) ///
    firstrow(varlabels)	keepcellfmt
    
    

    
   
